import os
import re
import pandas as pd
import chardet
import sys
# 1. 获取当前文件夹下的所有csv文件 
# 2. 遍历这些csv文件，如果这些文件中存在“经营范围”列，则删除该列并保存 
# 3. 去除这些csv文件中每个单元格内的空格以及特殊字符 
# 4. 从当前文件夹下的“sp.txt”文件中获取需要删除的特殊字符

# 获取当前文件夹下的所有csv文件
def get_csv_files(current_dir):
    csv_files = []
    for file in os.listdir(current_dir):
        if file.endswith(".csv"):
            csv_files.append(file)
            
    return csv_files

# 删除"经营范围"列并保存
def remove_business_scope_column(file_path):
    try:
        df = pd.read_csv(file_path, encoding='gb18030', dtype=str, low_memory=False)
    except:
        with open(file_path, 'rb') as f:
            result = chardet.detect(f.read())
            file_encoding = result['encoding']
        df = pd.read_csv(file_path, encoding=file_encoding, dtype=str, low_memory=False)

    if "经营范围" in df.columns:
        df.drop("经营范围", axis=1, inplace=True)
        df.to_csv(file_path, index=False)

# 去除空格和特殊字符
def remove_special_characters(file_path, special_chars):

    try:
        df = pd.read_csv(file_path, encoding='gb18030', dtype=str, low_memory=False)
    except:
        with open(file_path, 'rb') as f:
            result = chardet.detect(f.read())
            file_encoding = result['encoding']
        df = pd.read_csv(file_path, encoding=file_encoding, dtype=str, low_memory=False)

    df = df.replace({r'[{}]'.format(re.escape(special_chars)): ''}, regex=True)
    df.to_csv(file_path, index=False)

# 从sp.txt文件中获取需要删除的特殊字符
def get_special_characters(txt_file_path):
    special_chars = ''
    with open(txt_file_path, 'r', encoding='utf-8') as file:
        special_chars = file.read().strip()
    return special_chars

# 主程序
def main(function_num, csv_folder_path, txt_file_path):

    csv_files = get_csv_files(csv_folder_path)

    if function_num == '1':
        for file in csv_files:
            file = csv_folder_path + '\\' + file
            remove_business_scope_column(file)
        print("处理完成！")

    elif function_num == '2':
        special_chars = get_special_characters(txt_file_path)
        for file in csv_files:
            file = csv_folder_path + '\\' + file
            remove_special_characters(file, special_chars)
        print("处理完成！")

    elif function_num == '3':
        special_chars = get_special_characters(txt_file_path)
        for file in csv_files:
            file = csv_folder_path + '\\' + file
            remove_business_scope_column(file)
            remove_special_characters(file, special_chars)
        print("处理完成！")
    else:
        print('功能输入错误')

if __name__ == '__main__':
    if len(sys.argv) < 4:
        print("请提供操作指令(删除经营范围列1,删除特殊字符串2,both3)、csv文件夹路径和txt文件路径作为命令行参数。")
    else:
        function_num = sys.argv[1]
        csv_folder_path = sys.argv[2]
        txt_file_path = sys.argv[3]
        main(function_num, csv_folder_path, txt_file_path)